package com.dmcb.trade.business.crawlers.author;

import com.dmcb.common.business.services.BaseService;
import com.dmcb.common.web.conversion.JsonResult;
import com.dmcb.common.business.utils.WebClientUtil;
import com.dmcb.trade.controller.ArticleCrawlerController;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.List;

/**
 * Created by 黑米 on 2017/4/21.
 */
@Service
public class HuanQiuCrawler extends BaseService {

    @Autowired
    private WebClientUtil webClientUtil;
    @Autowired
    private ArticleCrawlerController articleCrawlerController;

    public JsonResult crawl() {
        //

        List<String> strings = new ArrayList<String>();
        strings.add("http://world.huanqiu.com/article/");
        strings.add("http://china.huanqiu.com/article/");
        strings.add("http://mil.huanqiu.com/observation/");
        strings.add("http://society.huanqiu.com/article/");
        strings.add("http://taiwan.huanqiu.com/");
        strings.add("http://opinion.huanqiu.com");
        strings.add("http://look.huanqiu.com/");
        strings.add("");
        strings.add("");
        strings.add("");
        strings.add("");
        strings.add("");
        strings.add("");


        int page = 3;
        int ero = 0;
        int success = 0;
        while (page < 7) {
            Document doc = webClientUtil.getDoc("http://world.huanqiu.com/article/" + page + ".html");
            if (doc == null) {
                return error("解析失败");
            }
            Elements elements = doc.select("div.fallsFlow>ul>li");
            if (elements == null || elements.size() == 0) {
                return error("没有获取到文章列表");
            }
            for (Element element : elements) {
                String link = element.select("em>a").attr("href");
                if (StringUtils.isBlank(link)) {
                    continue;
                }
                Document document = webClientUtil.getDoc(link);
                String source = document.select("strong#source_baidu>a").text();
                String author = document.select("span#editor_baidu").text();
                if((StringUtils.isBlank(source) || !source.equals("环球时报")) && ( StringUtils.isBlank(author) || !author.contains("责编"))){
                    continue;
                }
                JsonResult jsonResult = articleCrawlerController.crawl(3508, link);
                if (jsonResult.getStatus() != JsonResult.STATUS_SUCCESS) {
                    ero++;
                    if (ero > 10) {
                        return error("连续10次导入失败，请核实原因后再导入");
                    }
                    continue;
                }
                ero = 0;
                success++;
            }
            page++;
        }
        return success("成功添加" + success + "条文章");

    }
}
